Property-Based Testing with Hypothesis
Test properties of code with generated inputs, not just examples.
Why Property Testing?
# Example-based: tests specific cases
def test_sort_examples():
assert sort([3, 1, 2]) == [1, 2, 3]
assert sort([]) == []
assert sort([1]) == [1]
# Property-based: tests properties for ANY input
from hypothesis import given
from hypothesis import strategies as st
@given(st.lists(st.integers()))
def test_sort_properties(lst):
result = sort(lst)
# Property 1: Same length
assert len(result) == len(lst)
# Property 2: Sorted order
assert all(result[i] <= result[i+1] for i in range(len(result)-1))
# Property 3: Same elements
assert sorted(lst) == result
Basic Hypothesis Usage
from hypothesis import given, settings, assume
from hypothesis import strategies as st
@given(st.integers(), st.integers())
def test_addition_commutative(a, b):
"""Addition is commutative."""
assert a + b == b + a
@given(st.text())
def test_reverse_twice(s):
"""Reversing twice returns original."""
assert s[::-1][::-1] == s
@given(st.lists(st.integers(), min_size=1))
def test_max_in_list(lst):
"""Max is an element of the list."""
assert max(lst) in lst
Common Strategies
from hypothesis import strategies as st
# Primitives
st.integers() # Any integer
st.integers(min_value=0) # Non-negative
st.floats(allow_nan=False) # Floats without NaN
st.text() # Unicode strings
st.text(alphabet="abc", max_size=10)
st.booleans()
st.none()
st.binary() # Bytes
# Collections
st.lists(st.integers()) # List of ints
st.lists(st.text(), min_size=1, max_size=10)
st.sets(st.integers())
st.frozensets(st.text())
st.dictionaries(st.text(), st.integers())
# Tuples
st.tuples(st.integers(), st.text()) # Fixed structure
st.tuples(st.integers(), st.integers(), st.integers())
# Optional / One of
st.one_of(st.integers(), st.text()) # Either type
st.none() | st.integers() # Optional int
st.sampled_from(["red", "green", "blue"]) # Enum-like
Building Custom Strategies
from hypothesis import strategies as st
from dataclasses import dataclass
@dataclass
class User:
name: str
age: int
email: str
# Strategy for User objects
user_strategy = st.builds(
User,
name=st.text(min_size=1, max_size=50),
age=st.integers(min_value=0, max_value=150),
email=st.emails()
)
@given(user_strategy)
def test_user_validation(user):
assert validate_user(user)
# Composite strategies for complex logic
@st.composite
def sorted_lists(draw):
"""Generate pre-sorted lists."""
lst = draw(st.lists(st.integers()))
return sorted(lst)
@given(sorted_lists())
def test_binary_search(sorted_lst):
if sorted_lst:
target = sorted_lst[len(sorted_lst) // 2]
assert binary_search(sorted_lst, target) != -1
# Dependent strategies
@st.composite
def list_and_index(draw):
"""Generate a list and valid index into it."""
lst = draw(st.lists(st.integers(), min_size=1))
index = draw(st.integers(min_value=0, max_value=len(lst)-1))
return lst, index
@given(list_and_index())
def test_indexing(data):
lst, index = data
# This will never raise IndexError
assert lst[index] is not None or lst[index] is None
Filtering and Assumptions
from hypothesis import given, assume
from hypothesis import strategies as st
# Filter strategy (preferred when possible)
@given(st.integers().filter(lambda x: x % 2 == 0))
def test_even_numbers(n):
assert n % 2 == 0
# assume() for runtime filtering
@given(st.integers(), st.integers())
def test_division(a, b):
assume(b != 0) # Skip if b is 0
assert (a // b) * b + (a % b) == a
# Combining filters
positive_even = st.integers(min_value=1).filter(lambda x: x % 2 == 0)
Settings and Configuration
from hypothesis import given, settings, Verbosity, Phase
from hypothesis import strategies as st
# Per-test settings
@settings(max_examples=500) # More examples (default: 100)
@given(st.integers())
def test_thorough(n):
pass
@settings(deadline=None) # Disable timing check
@given(st.lists(st.integers()))
def test_slow_operation(lst):
expensive_operation(lst)
@settings(
max_examples=1000,
verbosity=Verbosity.verbose,
phases=[Phase.generate], # Skip shrinking
)
@given(st.text())
def test_verbose(s):
pass
# Profile for CI (in conftest.py)
from hypothesis import settings, Verbosity
settings.register_profile("ci", max_examples=1000)
settings.register_profile("dev", max_examples=10)
settings.register_profile("debug", max_examples=10, verbosity=Verbosity.verbose)
# Use: pytest --hypothesis-profile=ci
Stateful Testing
from hypothesis.stateful import RuleBasedStateMachine, rule, invariant
from hypothesis import strategies as st
class DatabaseMachine(RuleBasedStateMachine):
"""Test database operations maintain invariants."""
def __init__(self):
super().__init__()
self.db = {} # Model
self.real_db = RealDatabase() # System under test
@rule(key=st.text(), value=st.integers())
def set_value(self, key, value):
"""Set a value in both model and real DB."""
self.db[key] = value
self.real_db.set(key, value)
@rule(key=st.text())
def get_value(self, key):
"""Get value should match model."""
expected = self.db.get(key)
actual = self.real_db.get(key)
assert expected == actual
@rule(key=st.text())
def delete_value(self, key):
"""Delete from both."""
self.db.pop(key, None)
self.real_db.delete(key)
@invariant()
def keys_match(self):
"""Keys should always match."""
assert set(self.db.keys()) == set(self.real_db.keys())
# Run stateful tests
TestDatabase = DatabaseMachine.TestCase
pytest Integration
# conftest.py
from hypothesis import settings, Verbosity, Phase
# Default profile for all tests
settings.register_profile("default", max_examples=100)
# CI profile - more examples, deterministic
settings.register_profile(
"ci",
max_examples=500,
derandomize=True, # Deterministic for CI
)
# Load profile from env or default
import os
settings.load_profile(os.getenv("HYPOTHESIS_PROFILE", "default"))
# pytest.ini
# [pytest]
# addopts = --hypothesis-profile=default
Shrinking Examples
from hypothesis import given, settings
from hypothesis import strategies as st
@given(st.lists(st.integers()))
def test_shrinking_demo(lst):
"""Hypothesis shrinks failing inputs to minimal examples."""
# This will fail, but Hypothesis finds minimal case
assert sum(lst) < 100
# Hypothesis will shrink to something like:
# Falsifying example: test_shrinking_demo(lst=[100])
# Not: test_shrinking_demo(lst=[3847, -293, 10293, ...])
Common Patterns
# Roundtrip / Encode-Decode
@given(st.binary())
def test_compression_roundtrip(data):
assert decompress(compress(data)) == data
@given(st.dictionaries(st.text(), st.integers()))
def test_json_roundtrip(d):
assert json.loads(json.dumps(d)) == d
# Oracle testing (compare implementations)
@given(st.lists(st.integers()))
def test_sort_vs_stdlib(lst):
assert my_sort(lst) == sorted(lst)
# Metamorphic relations
@given(st.lists(st.integers()))
def test_sort_idempotent(lst):
"""Sorting twice equals sorting once."""
assert sort(sort(lst)) == sort(lst)
@given(st.lists(st.integers()), st.integers())
def test_sort_append(lst, x):
"""Appending and sorting vs inserting sorted."""
assert sort(lst + [x]) == sort(sorted(lst) + [x])
Quick Reference
| Strategy |
Description |
st.integers() |
Any integer |
st.floats() |
Floats (configure nan, inf) |
st.text() |
Unicode strings |
st.binary() |
Byte strings |
st.lists(st.X()) |
Lists of X |
st.dictionaries(k, v) |
Dict with key/value strategies |
st.builds(Class, ...) |
Build objects |
st.one_of(a, b) |
Either a or b |
st.sampled_from([...]) |
Pick from list |
@st.composite |
Custom strategy |
| Setting |
Purpose |
max_examples=N |
Number of test cases |
deadline=None |
Disable timing |
derandomize=True |
Reproducible runs |
verbosity=Verbosity.verbose |
Debug output |