operations.md 25 KB

PostgreSQL Operations Reference

Table of Contents

  1. Backup Strategies
  2. Vacuum Deep Dive
  3. Monitoring
  4. Connection Pooling

Backup Strategies

pg_dump

Logical backup of a single database. Consistent snapshot via a single transaction. Does not back up roles, tablespaces, or server-level configuration.

# Custom format (-Fc): compressed, parallel-restorable, most versatile
pg_dump -h localhost -U postgres -d mydb -Fc -f mydb.dump

# Plain SQL format (-Fp): human-readable, pipe-friendly, not parallel-restorable
pg_dump -h localhost -U postgres -d mydb -Fp -f mydb.sql

# Directory format (-Fd): one file per table, supports parallel dump and restore
pg_dump -h localhost -U postgres -d mydb -Fd -f mydb_dir/

# Parallel dump (directory format required, -j = number of workers)
pg_dump -h localhost -U postgres -d mydb -Fd -j 4 -f mydb_dir/

# Compressed with explicit compression level (PG16+ supports --compress=lz4)
pg_dump -h localhost -U postgres -d mydb -Fc --compress=9 -f mydb.dump

# Dump only specific tables
pg_dump -h localhost -U postgres -d mydb -Fc -t orders -t customers -f subset.dump

# Dump only schema (no data)
pg_dump -h localhost -U postgres -d mydb -Fc --schema-only -f schema.dump

# Dump only data (no DDL)
pg_dump -h localhost -U postgres -d mydb -Fc --data-only -f data.dump

# Exclude specific tables (e.g., large log tables)
pg_dump -h localhost -U postgres -d mydb -Fc -T audit_logs -T event_stream -f mydb.dump

pg_restore

# Restore custom/directory format
pg_restore -h localhost -U postgres -d mydb_restore -Fc mydb.dump

# Parallel restore (directory format)
pg_restore -h localhost -U postgres -d mydb_restore -Fd -j 4 mydb_dir/

# Restore single table from full dump
pg_restore -h localhost -U postgres -d mydb -t orders mydb.dump

# Restore schema only, then data (useful for pre-creating indexes)
pg_restore -h localhost -U postgres -d mydb --schema-only mydb.dump
pg_restore -h localhost -U postgres -d mydb --data-only mydb.dump

# --no-owner / --no-privileges: skip ownership and ACL statements
pg_restore -h localhost -U postgres -d mydb --no-owner --no-privileges mydb.dump

pg_dumpall

Backs up all databases plus server-level objects (roles, tablespaces). Output is always plain SQL (no custom/directory format support).

# Full cluster backup
pg_dumpall -h localhost -U postgres -f cluster_backup.sql

# Globals only (roles and tablespaces, no database data)
pg_dumpall -h localhost -U postgres --globals-only -f globals.sql

# Restore
psql -h localhost -U postgres -f cluster_backup.sql

pg_basebackup

Physical backup of the entire cluster. Required for PITR and streaming replication setup. Much faster than pg_dump for large databases since it copies raw files.

# Basic base backup (plain format, WAL streamed during backup)
pg_basebackup -h localhost -U replicator -D /backup/base -P

# Include WAL files in backup (-Xs = stream WAL during backup)
pg_basebackup -h localhost -U replicator -D /backup/base -Xs -P

# Tar format with gzip compression (one .tar.gz per tablespace)
pg_basebackup -h localhost -U replicator -D /backup/base -Ft -z -P

# Tar format with LZ4 (PG15+, faster than gzip)
pg_basebackup -h localhost -U replicator -D /backup/base -Ft --compress=lz4 -P

# Checkpoint mode: fast = force immediate checkpoint, spread = rate-limited I/O
pg_basebackup -h localhost -U replicator -D /backup/base -Xs --checkpoint=fast -P

# Required postgresql.conf settings for pg_basebackup:
# wal_level = replica          (minimum)
# max_wal_senders = 3          (at least 1 available sender)
# archive_mode = on            (for PITR)

PITR: Point-in-Time Recovery

PITR combines a base backup with WAL archive segments to restore to any point in time.

WAL Archiving Setup

# postgresql.conf settings
wal_level = replica
archive_mode = on
archive_command = 'test ! -f /wal_archive/%f && cp %p /wal_archive/%f'
# %p = full path to WAL file, %f = filename only

# With AWS S3 (using WAL-E or pgBackRest in production)
archive_command = 'aws s3 cp %p s3://my-bucket/wal-archive/%f'

# Verify archive is working
SELECT pg_switch_wal();  -- force WAL segment switch to test archive_command
-- Check /wal_archive for new .wal files

Recovery Configuration

Create recovery.signal file in PGDATA to trigger recovery mode (PG12+). Recovery parameters go in postgresql.conf (PG12+) or recovery.conf (pre-PG12).

# postgresql.conf additions for recovery
restore_command = 'cp /wal_archive/%f %p'
# or from S3:
restore_command = 'aws s3 cp s3://my-bucket/wal-archive/%f %p'

# Recovery target options (pick one):
recovery_target_time = '2024-03-15 14:30:00'        # time-based
recovery_target_xid = '1234567'                       # transaction ID
recovery_target_lsn = '0/15D5A50'                    # LSN
recovery_target_name = 'before_migration'             # named restore point
recovery_target = 'immediate'                         # as soon as consistent

# After reaching target:
recovery_target_action = 'promote'   # promote to primary (default)
recovery_target_action = 'pause'     # pause, inspect, then pg_wal_replay_resume()
recovery_target_action = 'shutdown'  # stop after recovery

# Named restore points (create before risky operations)
SELECT pg_create_restore_point('before_bulk_delete');
# Full PITR procedure:
# 1. Stop PostgreSQL
# 2. Move PGDATA aside: mv /var/lib/postgresql/14/main /var/lib/postgresql/14/main.bak
# 3. Restore base backup: pg_basebackup ... or extract tar
# 4. Add recovery settings to postgresql.conf
# 5. Touch recovery.signal: touch $PGDATA/recovery.signal
# 6. Start PostgreSQL -- it will replay WAL until target, then promote

Backup Verification

# pg_verifybackup (PG13+): verify base backup integrity
pg_verifybackup /backup/base

# Check backup manifest
pg_verifybackup --no-manifest-checksums /backup/base  # skip slow checksum verify

# Test restore (do this regularly in staging)
pg_restore --list mydb.dump | head -20   # check contents without restoring

# Verify dump readability
pg_restore -l mydb.dump > /dev/null && echo "Dump is readable"

Vacuum Deep Dive

Regular VACUUM vs VACUUM FULL vs pg_repack

Regular VACUUM: marks dead tuples as reusable space. Does not shrink table on disk. Non-blocking (shares table with readers and writers). Run this routinely.

VACUUM FULL: rewrites entire table to new file, reclaiming disk space. Requires exclusive lock (blocks all access). Causes table/index bloat to disappear. Rarely needed if autovacuum is tuned correctly.

pg_repack: rewrites table without long exclusive lock (builds new table in background, swaps at end with brief lock). Preferred over VACUUM FULL for large production tables.

-- Regular VACUUM (non-blocking)
VACUUM orders;

-- VACUUM with ANALYZE (update statistics too)
VACUUM ANALYZE orders;

-- VERBOSE output to understand what was cleaned
VACUUM VERBOSE orders;

-- VACUUM FULL (requires AccessExclusiveLock -- schedule maintenance window)
VACUUM FULL orders;

-- Check what VACUUM would do (dry run via visibility info)
SELECT relname, n_dead_tup, n_live_tup,
       round(100.0 * n_dead_tup / nullif(n_live_tup + n_dead_tup, 0), 2) AS dead_pct,
       last_vacuum, last_autovacuum
FROM pg_stat_user_tables
ORDER BY n_dead_tup DESC;
# pg_repack (must install extension)
pg_repack -h localhost -U postgres -d mydb -t orders

# Repack entire database
pg_repack -h localhost -U postgres -d mydb

# Repack only indexes (faster, lower risk)
pg_repack -h localhost -U postgres -d mydb -t orders --only-indexes

Autovacuum Tuning

Autovacuum triggers when: n_dead_tup > autovacuum_vacuum_threshold + autovacuum_vacuum_scale_factor * n_live_tup

# postgresql.conf global settings
autovacuum = on                              # never disable
autovacuum_max_workers = 5                   # default 3; increase for many tables
autovacuum_vacuum_threshold = 50            # min dead tuples before trigger
autovacuum_vacuum_scale_factor = 0.02       # 2% of table (default 0.2 = 20%)
autovacuum_analyze_threshold = 50
autovacuum_analyze_scale_factor = 0.01      # 1% (default 0.1 = 10%)
autovacuum_vacuum_cost_delay = 2ms          # throttle I/O (default 2ms in PG13+)
autovacuum_vacuum_cost_limit = 200          # I/O budget per delay cycle
autovacuum_naptime = 30s                    # check interval for each worker
-- Per-table autovacuum override (large tables need lower scale factor)
-- For a 100M row table, 20% = 20M dead tuples before vacuum -- too late
ALTER TABLE orders SET (
    autovacuum_vacuum_scale_factor = 0.01,   -- 1% instead of 20%
    autovacuum_vacuum_threshold = 1000,
    autovacuum_analyze_scale_factor = 0.005,
    autovacuum_vacuum_cost_delay = 10        -- ms; slow down to reduce I/O impact
);

-- High-churn tables (logs, queues): more aggressive
ALTER TABLE job_queue SET (
    autovacuum_vacuum_scale_factor = 0.001,
    autovacuum_vacuum_threshold = 100
);

Transaction ID Wraparound

PostgreSQL uses 32-bit transaction IDs (XIDs). After ~2 billion transactions, XID wraps. PostgreSQL will stop accepting writes when age reaches autovacuum_freeze_max_age (default 200M).

-- Monitor XID age across all databases (run as superuser)
SELECT datname,
       age(datfrozenxid) AS xid_age,
       2000000000 - age(datfrozenxid) AS remaining_xids,
       round(100.0 * age(datfrozenxid) / 2000000000, 2) AS pct_used
FROM pg_database
ORDER BY age(datfrozenxid) DESC;

-- Monitor per-table XID age (find tables that need freezing)
SELECT relname,
       age(relfrozenxid) AS xid_age,
       pg_size_pretty(pg_total_relation_size(oid)) AS size
FROM pg_class
WHERE relkind = 'r'
ORDER BY age(relfrozenxid) DESC
LIMIT 20;

-- Emergency response when approaching wraparound:
-- 1. Check if autovacuum is running: SELECT * FROM pg_stat_activity WHERE query LIKE 'autovacuum%';
-- 2. Manual aggressive freeze:
VACUUM FREEZE orders;        -- force freeze all tuples in table
-- 3. For cluster-wide freeze:
-- vacuumdb -a -F -j 4       -- freeze all databases, 4 parallel workers

-- Relevant postgresql.conf settings
vacuum_freeze_min_age = 50000000        -- freeze tuples older than this (50M XIDs)
vacuum_freeze_table_age = 150000000     -- force full table scan at this age
autovacuum_freeze_max_age = 200000000   -- emergency autovacuum triggered here

Dead Tuple Accumulation

Long-running transactions and idle in transaction sessions prevent VACUUM from removing dead tuples because those old snapshots may still need to see pre-update versions.

-- Find sessions holding old snapshots (preventing dead tuple cleanup)
SELECT pid, usename, application_name, state,
       now() - xact_start AS xact_age,
       now() - query_start AS query_age,
       left(query, 80) AS current_query
FROM pg_stat_activity
WHERE state != 'idle'
  AND xact_start < now() - interval '5 minutes'
ORDER BY xact_start;

-- Find the oldest active transaction (this limits vacuum)
SELECT min(xact_start), max(now() - xact_start) AS max_age
FROM pg_stat_activity
WHERE xact_start IS NOT NULL;

-- Check if replication slots are holding back WAL (another source of bloat)
SELECT slot_name, active, pg_size_pretty(
    pg_wal_lsn_diff(pg_current_wal_lsn(), restart_lsn)
) AS lag
FROM pg_replication_slots;

-- Kill long-running idle-in-transaction sessions (use carefully)
SELECT pg_terminate_backend(pid)
FROM pg_stat_activity
WHERE state = 'idle in transaction'
  AND now() - xact_start > interval '1 hour';

-- Prevent accumulation: set statement/transaction timeouts
-- postgresql.conf or ALTER ROLE:
-- idle_in_transaction_session_timeout = '10min'
-- statement_timeout = '30s'

Monitoring

pg_stat_activity

-- Connection overview by state
SELECT state, count(*), max(now() - state_change) AS max_time_in_state
FROM pg_stat_activity
GROUP BY state
ORDER BY count DESC;

-- Long-running queries (over 30 seconds)
SELECT pid, usename, application_name, client_addr, state,
       now() - query_start AS duration,
       wait_event_type, wait_event,
       left(query, 120) AS query
FROM pg_stat_activity
WHERE state != 'idle'
  AND query_start < now() - interval '30 seconds'
ORDER BY query_start;

-- Wait events (what are connections waiting for)
SELECT wait_event_type, wait_event, count(*)
FROM pg_stat_activity
WHERE state != 'idle'
GROUP BY wait_event_type, wait_event
ORDER BY count DESC;
-- Common wait events:
-- Lock/relation = waiting for table lock
-- Client/ClientRead = waiting for client to send data
-- IO/DataFileRead = reading from disk
-- IPC/BgWorkerShutdown = parallel query coordination

pg_stat_user_tables

-- Tables with high sequential scan rates (missing indexes?)
SELECT relname,
       seq_scan,
       idx_scan,
       round(100.0 * idx_scan / nullif(seq_scan + idx_scan, 0), 2) AS idx_pct,
       n_live_tup,
       n_dead_tup,
       last_vacuum::date,
       last_autovacuum::date,
       last_analyze::date
FROM pg_stat_user_tables
WHERE seq_scan > 100
ORDER BY seq_scan DESC;

-- Tables most in need of VACUUM
SELECT relname,
       n_dead_tup,
       n_live_tup,
       round(100.0 * n_dead_tup / nullif(n_live_tup + n_dead_tup, 0), 2) AS dead_pct,
       last_autovacuum,
       last_vacuum
FROM pg_stat_user_tables
ORDER BY n_dead_tup DESC
LIMIT 20;

Unused Index Detection

-- Indexes that are never used (candidates for removal)
SELECT schemaname, relname AS table, indexrelname AS index,
       pg_size_pretty(pg_relation_size(i.indexrelid)) AS index_size,
       idx_scan AS scans
FROM pg_stat_user_indexes ui
JOIN pg_index i ON i.indexrelid = ui.indexrelid
WHERE idx_scan = 0
  AND NOT indisunique           -- keep unique constraints
  AND NOT indisprimary          -- keep primary keys
ORDER BY pg_relation_size(i.indexrelid) DESC;

-- Indexes with low usage relative to writes (more overhead than benefit)
SELECT relname AS table,
       indexrelname AS index,
       idx_scan AS reads,
       pg_stat_get_tuples_inserted(relid) + pg_stat_get_tuples_updated(relid)
           + pg_stat_get_tuples_deleted(relid) AS writes,
       pg_size_pretty(pg_relation_size(indexrelid)) AS size
FROM pg_stat_user_indexes
WHERE idx_scan < 100
ORDER BY pg_relation_size(indexrelid) DESC;

-- Note: reset stats after index creation or major data loads
-- SELECT pg_stat_reset();  -- resets ALL stats for this database

pg_stat_bgwriter

-- Checkpoint health and buffer writer activity
SELECT checkpoints_timed,
       checkpoints_req,                          -- forced (bad: I/O spike risk)
       round(100.0 * checkpoints_req /
           nullif(checkpoints_timed + checkpoints_req, 0), 2) AS forced_pct,
       buffers_checkpoint,                        -- written at checkpoint
       buffers_clean,                             -- written by bgwriter
       maxwritten_clean,                          -- bgwriter hit write limit (increase bgwriter_lru_maxpages)
       buffers_backend,                           -- written by backend directly (bad)
       buffers_backend_fsync,                     -- backend had to fsync (very bad)
       buffers_alloc,                             -- new buffers allocated
       stats_reset::date
FROM pg_stat_bgwriter;

-- High checkpoints_req: reduce checkpoint_completion_target or increase max_wal_size
-- High buffers_backend: shared_buffers too small or checkpoint interval too short
-- Ideal: checkpoints_req / total < 10%, buffers_backend / total < 5%

-- postgresql.conf tuning:
-- checkpoint_completion_target = 0.9   -- spread checkpoint I/O over 90% of interval
-- max_wal_size = 4GB                   -- larger = fewer forced checkpoints
-- checkpoint_timeout = 10min           -- default

Lock Contention and Deadlocks

-- Active locks and what is blocking what
SELECT blocked.pid AS blocked_pid,
       blocked.usename AS blocked_user,
       blocking.pid AS blocking_pid,
       blocking.usename AS blocking_user,
       blocked_activity.wait_event,
       blocked_activity.query AS blocked_query,
       blocking_activity.query AS blocking_query
FROM pg_locks blocked
JOIN pg_stat_activity blocked_activity ON blocked_activity.pid = blocked.pid
JOIN pg_locks blocking ON blocking.locktype = blocked.locktype
    AND blocking.database IS NOT DISTINCT FROM blocked.database
    AND blocking.relation IS NOT DISTINCT FROM blocked.relation
    AND blocking.page IS NOT DISTINCT FROM blocked.page
    AND blocking.tuple IS NOT DISTINCT FROM blocked.tuple
    AND blocking.classid IS NOT DISTINCT FROM blocked.classid
    AND blocking.objid IS NOT DISTINCT FROM blocked.objid
    AND blocking.objsubid IS NOT DISTINCT FROM blocked.objsubid
    AND blocking.pid != blocked.pid
    AND blocking.granted
JOIN pg_stat_activity blocking_activity ON blocking_activity.pid = blocking.pid
WHERE NOT blocked.granted;

-- Lock types by table (what mode of locks are held)
SELECT relname, mode, count(*)
FROM pg_locks l
JOIN pg_class c ON c.oid = l.relation
WHERE l.granted
GROUP BY relname, mode
ORDER BY relname, mode;

-- Deadlock investigation: enable logging in postgresql.conf
-- log_lock_waits = on          -- log waits over deadlock_timeout
-- deadlock_timeout = 1s        -- time before deadlock check runs
-- log_min_duration_statement = 5000  -- log queries taking over 5s

Cache Hit Ratio

-- Database-level buffer cache hit ratio (target: > 99% for OLTP)
SELECT datname,
       blks_hit,
       blks_read,
       round(100.0 * blks_hit / nullif(blks_hit + blks_read, 0), 2) AS hit_ratio
FROM pg_stat_database
WHERE datname = current_database();

-- Table-level cache hit ratio
SELECT relname,
       heap_blks_hit,
       heap_blks_read,
       round(100.0 * heap_blks_hit / nullif(heap_blks_hit + heap_blks_read, 0), 2) AS hit_ratio
FROM pg_statio_user_tables
ORDER BY heap_blks_read DESC;

-- Index cache hit ratio
SELECT relname, indexrelname,
       idx_blks_hit,
       idx_blks_read,
       round(100.0 * idx_blks_hit / nullif(idx_blks_hit + idx_blks_read, 0), 2) AS hit_ratio
FROM pg_statio_user_indexes
ORDER BY idx_blks_read DESC;

Table and Index Bloat Estimation

-- Table bloat estimate (uses pgstattuple extension if available, else heuristic)
-- Heuristic approach (no extension required):
SELECT
    schemaname,
    relname AS table,
    pg_size_pretty(pg_total_relation_size(schemaname || '.' || relname)) AS total_size,
    pg_size_pretty(pg_relation_size(schemaname || '.' || relname)) AS table_size,
    round(100.0 * n_dead_tup / nullif(n_live_tup + n_dead_tup, 0), 2) AS dead_tup_pct
FROM pg_stat_user_tables
ORDER BY pg_total_relation_size(schemaname || '.' || relname) DESC;

-- Using pgstattuple for precise bloat (requires extension, scans full table)
CREATE EXTENSION IF NOT EXISTS pgstattuple;
SELECT * FROM pgstattuple('orders');
-- dead_tuple_percent > 20% warrants VACUUM or pg_repack

-- Index bloat via pgstatindex
SELECT * FROM pgstatindex('orders_pkey');
-- avg_leaf_density < 70% suggests bloat; REINDEX or pg_repack --only-indexes

-- Quick bloat estimate without extension (from check_postgres project):
SELECT
    tablename,
    pg_size_pretty(real_size) AS real_size,
    pg_size_pretty(bloat_size) AS bloat_size,
    round(bloat_ratio::numeric, 2) AS bloat_ratio
FROM (
    SELECT tablename,
           pg_total_relation_size(tablename::regclass) AS real_size,
           pg_total_relation_size(tablename::regclass) -
               (pg_relation_size(tablename::regclass) * (1.0 - n_dead_tup::float / nullif(n_live_tup + n_dead_tup, 0))) AS bloat_size,
           100.0 * n_dead_tup / nullif(n_live_tup + n_dead_tup, 0) AS bloat_ratio
    FROM pg_stat_user_tables
) t
WHERE bloat_ratio > 10
ORDER BY bloat_size DESC;

Connection Pooling

pgBouncer Modes

Session mode: client holds server connection for entire session duration. Same behavior as direct connection. Use for: apps using session-level features (temp tables, prepared statements with protocol-level binding, SET LOCAL, advisory locks).

Transaction mode: server connection returned to pool after each transaction. Much higher multiplexing. Use for: most web applications using short transactions. Limitations: SET, LISTEN, NOTIFY, prepared statements (without server_reset_query), temp tables.

Statement mode: connection returned after each statement. Rarely used. Limitation: no multi-statement transactions. Useful only for simple read-only workloads.

; pgbouncer.ini
[databases]
mydb = host=localhost port=5432 dbname=mydb

[pgbouncer]
listen_port = 6432
listen_addr = *
auth_type = scram-sha-256
auth_file = /etc/pgbouncer/userlist.txt

pool_mode = transaction          ; session|transaction|statement
max_client_conn = 1000           ; max connections from clients
default_pool_size = 25           ; server connections per database/user pair
min_pool_size = 5                ; keep this many open even when idle
reserve_pool_size = 5            ; extra connections for pool_mode=session bursts
reserve_pool_timeout = 5         ; seconds to wait before using reserve pool

server_reset_query = DISCARD ALL ; run after each session return (session mode)
server_check_query = SELECT 1    ; health check query
server_idle_timeout = 600        ; close idle server connections after 10min
client_idle_timeout = 0          ; 0 = never close idle clients (set in app instead)

; Logging
log_connections = 0              ; reduce noise in production
log_disconnections = 0
log_pooler_errors = 1
stats_period = 60                ; log stats every 60 seconds
# pgBouncer monitoring via admin console
psql -h localhost -p 6432 -U pgbouncer pgbouncer

SHOW POOLS;      -- pool stats: cl_active, cl_waiting, sv_active, sv_idle
SHOW CLIENTS;    -- connected clients
SHOW SERVERS;    -- server connections
SHOW STATS;      -- request rates, query times
SHOW CONFIG;     -- current configuration

PAUSE mydb;      -- pause pool (for maintenance)
RESUME mydb;     -- resume pool
RELOAD;          -- reload config without restart

Application-Level Pooling (SQLAlchemy)

from sqlalchemy import create_engine

engine = create_engine(
    "postgresql+psycopg2://user:pass@localhost:5432/mydb",
    pool_size=10,           # persistent connections in pool
    max_overflow=20,        # extra connections beyond pool_size (temporary)
    pool_timeout=30,        # seconds to wait for available connection
    pool_recycle=1800,      # recycle connections after 30min (avoid stale connections)
    pool_pre_ping=True,     # test connection before using from pool
)

# With pgBouncer in transaction mode, use NullPool or StaticPool
# (pgBouncer handles pooling, app should not pool on top of pooler)
from sqlalchemy.pool import NullPool
engine = create_engine(
    "postgresql+psycopg2://user:pass@pgbouncer:6432/mydb",
    poolclass=NullPool       # no application-level pooling
)

Connection Sizing Guidelines

The classic formula: connections = (core_count * 2) + effective_spindle_count

For SSD storage (spindles = 1), a 16-core server: (16 * 2) + 1 = 33 PostgreSQL connections.

-- Check current connection usage
SELECT count(*) AS total,
       count(*) FILTER (WHERE state = 'active') AS active,
       count(*) FILTER (WHERE state = 'idle') AS idle,
       count(*) FILTER (WHERE state = 'idle in transaction') AS idle_txn,
       max_conn
FROM pg_stat_activity
CROSS JOIN (SELECT setting::int AS max_conn FROM pg_settings WHERE name = 'max_connections') s
GROUP BY max_conn;

-- Connections by application
SELECT application_name, count(*), max(now() - state_change) AS longest_idle
FROM pg_stat_activity
GROUP BY application_name
ORDER BY count DESC;
; postgresql.conf connection settings
max_connections = 100            ; total connections (including superuser)
superuser_reserved_connections = 3  ; reserved for superuser access

; Memory implication: each connection uses ~5-10MB of RAM
; At max_connections = 200: budget 1-2GB RAM for connection overhead alone
; Use pgBouncer to keep max_connections low (50-100) and serve thousands of clients

; Recommended approach for most web apps:
; App -> pgBouncer (transaction mode, max_client_conn=1000, pool_size=25)
;     -> PostgreSQL (max_connections=50)

Monitoring Pool Health

-- Alert conditions to monitor:
-- 1. Connections near max_connections
SELECT count(*) * 100.0 / current_setting('max_connections')::int AS pct_used
FROM pg_stat_activity;

-- 2. Idle-in-transaction accumulating (connection leak or slow clients)
SELECT count(*)
FROM pg_stat_activity
WHERE state = 'idle in transaction'
  AND now() - state_change > interval '5 minutes';

-- 3. Connection wait (pgBouncer cl_waiting > 0 sustained = under-provisioned pool)

-- Set timeouts to prevent connection leaks:
-- ALTER ROLE myapp SET idle_in_transaction_session_timeout = '5min';
-- ALTER ROLE myapp SET statement_timeout = '30s';