# Load Testing

Comprehensive guide to load testing tools, methodology, and CI integration.

## k6 (Grafana)

### Script Structure

```javascript
// k6 script: load-test.js
import http from 'k6/http';
import { check, sleep, group } from 'k6';
import { Rate, Trend, Counter } from 'k6/metrics';

// Custom metrics
const errorRate = new Rate('errors');
const responseTime = new Trend('response_time');
const requestCount = new Counter('total_requests');

// Test configuration
export const options = {
  // Scenario-based configuration
  scenarios: {
    // Ramp up and sustain load
    load_test: {
      executor: 'ramping-vus',
      startVUs: 0,
      stages: [
        { duration: '2m', target: 50 },   // Ramp up
        { duration: '5m', target: 50 },   // Sustain
        { duration: '2m', target: 0 },    // Ramp down
      ],
      gracefulRampDown: '30s',
    },
  },

  // Thresholds (pass/fail criteria)
  thresholds: {
    http_req_duration: ['p(95)<500', 'p(99)<1000'],  // ms
    http_req_failed: ['rate<0.01'],                   // <1% error rate
    errors: ['rate<0.05'],                            // Custom metric
  },
};

// Setup: runs once before test
export function setup() {
  const loginRes = http.post('https://api.example.com/login', {
    username: 'testuser',
    password: 'testpass',
  });
  return { token: loginRes.json('token') };
}

// Default function: runs for each VU iteration
export default function (data) {
  group('API endpoints', function () {
    // GET request
    const listRes = http.get('https://api.example.com/items', {
      headers: { Authorization: `Bearer ${data.token}` },
    });

    check(listRes, {
      'status is 200': (r) => r.status === 200,
      'response time < 500ms': (r) => r.timings.duration < 500,
      'has items': (r) => r.json('items').length > 0,
    });

    errorRate.add(listRes.status !== 200);
    responseTime.add(listRes.timings.duration);
    requestCount.add(1);

    // POST request
    const createRes = http.post(
      'https://api.example.com/items',
      JSON.stringify({ name: 'test item', value: Math.random() }),
      {
        headers: {
          'Content-Type': 'application/json',
          Authorization: `Bearer ${data.token}`,
        },
      }
    );

    check(createRes, {
      'created successfully': (r) => r.status === 201,
    });

    sleep(1); // Think time between requests
  });
}

// Teardown: runs once after test
export function teardown(data) {
  http.post('https://api.example.com/cleanup', null, {
    headers: { Authorization: `Bearer ${data.token}` },
  });
}
```

### k6 Executors

```
Executor selection:
│
├─ shared-iterations
│  └─ Fixed total iterations split across VUs
│     Use: "Run exactly N requests total"
│
├─ per-vu-iterations
│  └─ Each VU runs exactly N iterations
│     Use: "Each user does N actions"
│
├─ constant-vus
│  └─ Fixed number of VUs for a duration
│     Use: "Sustain N concurrent users"
│
├─ ramping-vus
│  └─ VUs ramp up/down in stages
│     Use: Standard load test pattern
│
├─ constant-arrival-rate
│  └─ Fixed request rate regardless of response time
│     Use: "Maintain exactly N RPS" (most realistic)
│
├─ ramping-arrival-rate
│  └─ Request rate ramps up/down
│     Use: "Find breaking point at increasing RPS"
│
└─ externally-controlled
   └─ VUs controlled via k6 REST API
      Use: Dynamic load adjustment during test
```

### k6 CLI Commands

```bash
# Run a test
k6 run script.js

# Run with overrides
k6 run --vus 50 --duration 30s script.js
k6 run --env BASE_URL=https://staging.example.com script.js

# Output to various formats
k6 run --out json=results.json script.js
k6 run --out csv=results.csv script.js
k6 run --out influxdb=http://localhost:8086/k6 script.js

# Cloud execution (requires k6 cloud account)
k6 cloud script.js

# Convert HAR to k6 script
k6 convert recording.har -O generated-script.js

# Inspect script options without running
k6 inspect script.js
```

### k6 Browser Testing

```javascript
import { browser } from 'k6/browser';

export const options = {
  scenarios: {
    browser: {
      executor: 'constant-vus',
      vus: 1,
      duration: '30s',
      options: {
        browser: {
          type: 'chromium',
        },
      },
    },
  },
};

export default async function () {
  const page = await browser.newPage();
  try {
    await page.goto('https://example.com');
    await page.locator('input[name="username"]').fill('testuser');
    await page.locator('input[name="password"]').fill('testpass');
    await page.locator('button[type="submit"]').click();
    await page.waitForNavigation();

    // Measure Web Vitals
    const lcp = await page.evaluate(() => {
      return new Promise((resolve) => {
        new PerformanceObserver((list) => {
          const entries = list.getEntries();
          resolve(entries[entries.length - 1].startTime);
        }).observe({ type: 'largest-contentful-paint', buffered: true });
      });
    });
    console.log(`LCP: ${lcp}ms`);
  } finally {
    await page.close();
  }
}
```

## Artillery

### YAML Configuration

```yaml
# artillery-config.yml
config:
  target: "https://api.example.com"
  phases:
    - duration: 120    # 2 minutes
      arrivalRate: 10  # 10 new users per second
      name: "Warm-up"
    - duration: 300    # 5 minutes
      arrivalRate: 50  # 50 new users per second
      name: "Sustained load"
    - duration: 60
      arrivalRate: 100
      name: "Spike"

  # Plugins
  plugins:
    expect: {}        # Response validation
    metrics-by-endpoint: {} # Per-endpoint metrics

  # Default headers
  defaults:
    headers:
      Content-Type: "application/json"

  # Variables
  variables:
    baseUrl: "https://api.example.com"

  # Connection settings
  http:
    timeout: 10        # seconds
    pool: 100          # connection pool size

scenarios:
  - name: "Browse and purchase"
    weight: 70         # 70% of traffic
    flow:
      - get:
          url: "/products"
          expect:
            - statusCode: 200
            - hasProperty: "items"
          capture:
            - json: "$.items[0].id"
              as: "productId"
      - think: 3       # 3 second pause
      - get:
          url: "/products/{{ productId }}"
          expect:
            - statusCode: 200
      - post:
          url: "/cart"
          json:
            productId: "{{ productId }}"
            quantity: 1
          expect:
            - statusCode: 201

  - name: "Search"
    weight: 30         # 30% of traffic
    flow:
      - get:
          url: "/search?q={{ $randomString() }}"
          expect:
            - statusCode: 200
```

### Artillery CLI

```bash
# Run load test
artillery run artillery-config.yml

# Quick test (no config needed)
artillery quick --count 100 --num 10 https://api.example.com

# Generate HTML report
artillery run --output report.json artillery-config.yml
artillery report report.json

# Run with environment-specific config
artillery run -e staging artillery-config.yml

# Run with Playwright (browser scenarios)
artillery run --platform playwright artillery-browser.yml
```

## vegeta (Go)

### Attack and Report

```bash
# Basic attack
echo "GET http://localhost:8080/" | vegeta attack -duration=30s -rate=50/s | vegeta report

# Multiple endpoints from file
# targets.txt:
# GET http://localhost:8080/api/users
# GET http://localhost:8080/api/products
# POST http://localhost:8080/api/orders
# Content-Type: application/json
# @body.json
vegeta attack -targets=targets.txt -duration=60s -rate=100/s | vegeta report

# Custom headers
echo "GET http://localhost:8080/api/data" | \
  vegeta attack -header "Authorization: Bearer TOKEN" -duration=30s | \
  vegeta report

# Output formats
echo "GET http://localhost:8080/" | vegeta attack -duration=30s | vegeta report -type=text
echo "GET http://localhost:8080/" | vegeta attack -duration=30s | vegeta report -type=json
echo "GET http://localhost:8080/" | vegeta attack -duration=30s | vegeta report -type=hist[0,50ms,100ms,200ms,500ms,1s]

# Generate latency plot (HDR histogram)
echo "GET http://localhost:8080/" | vegeta attack -duration=60s | vegeta plot > plot.html

# Encode results for later analysis
echo "GET http://localhost:8080/" | vegeta attack -duration=60s | vegeta encode > results.json

# Constant rate vs max rate
echo "GET http://localhost:8080/" | vegeta attack -rate=0 -max-workers=100 -duration=30s | vegeta report
# -rate=0 means "as fast as possible" with max-workers limit
```

### vegeta Report Interpretation

```
Requests      [total, rate, throughput]  3000, 100.03, 99.87
Duration      [total, attack, wait]     30.04s, 29.99s, 49.54ms
Latencies     [min, mean, 50, 90, 95, 99, max]  12.5ms, 48.2ms, 42.1ms, 85.3ms, 120.5ms, 250.1ms, 1.2s
Bytes In      [total, mean]             1500000, 500.00
Bytes Out     [total, mean]             0, 0.00
Success       [ratio]                   99.5%
Status Codes  [code:count]              200:2985  500:15

Key metrics:
- p50 (median): typical user experience
- p95: 95% of users experience this or better
- p99: tail latency (worst 1%)
- Success ratio: anything below 99% needs investigation
- Throughput vs rate: throughput < rate means server can't keep up
```

## wrk / wrk2

### wrk: Lightweight HTTP Benchmarking

```bash
# Basic usage
wrk -t4 -c100 -d30s http://localhost:8080/
# -t4: 4 threads
# -c100: 100 connections
# -d30s: 30 second duration

# With Lua script
wrk -t4 -c100 -d30s -s script.lua http://localhost:8080/

# wrk2 (constant throughput mode)
wrk2 -t4 -c100 -d30s -R2000 http://localhost:8080/
# -R2000: target 2000 requests/second
```

### wrk Lua Scripts

```lua
-- post-request.lua: POST with JSON body
wrk.method = "POST"
wrk.body   = '{"username":"test","password":"test"}'
wrk.headers["Content-Type"] = "application/json"

-- dynamic-request.lua: different paths per request
counter = 0
request = function()
  counter = counter + 1
  local path = "/api/items/" .. (counter % 1000)
  return wrk.format("GET", path)
end

-- response.lua: validate responses
response = function(status, headers, body)
  if status ~= 200 then
    wrk.thread:stop()
  end
end

-- report.lua: custom reporting
done = function(summary, latency, requests)
  io.write("Latency distribution:\n")
  for _, p in pairs({ 50, 90, 95, 99, 99.9 }) do
    n = latency:percentile(p)
    io.write(string.format("%g%%\t%d ms\n", p, n / 1000))
  end
end
```

## Locust (Python)

### User Classes and Tasks

```python
# locustfile.py
from locust import HttpUser, task, between, events
from locust import LoadTestShape
import json

class WebsiteUser(HttpUser):
    # Wait between requests (simulates think time)
    wait_time = between(1, 5)

    # Run once per user on start
    def on_start(self):
        response = self.client.post("/login", json={
            "username": "testuser",
            "password": "testpass"
        })
        self.token = response.json()["token"]
        self.client.headers.update({
            "Authorization": f"Bearer {self.token}"
        })

    @task(3)  # Weight: 3x more likely than weight-1 tasks
    def browse_items(self):
        with self.client.get("/api/items", catch_response=True) as response:
            if response.status_code == 200:
                items = response.json()["items"]
                if len(items) == 0:
                    response.failure("No items returned")
            else:
                response.failure(f"Status {response.status_code}")

    @task(1)
    def create_item(self):
        self.client.post("/api/items", json={
            "name": f"item-{self.environment.runner.user_count}",
            "value": 42
        })

    @task(2)
    def search(self):
        self.client.get("/api/search?q=test")

    def on_stop(self):
        self.client.post("/logout")


class AdminUser(HttpUser):
    """Separate user class with different behavior"""
    wait_time = between(5, 15)
    weight = 1  # 1 admin for every 10 regular users (if WebsiteUser weight=10)

    @task
    def check_dashboard(self):
        self.client.get("/admin/dashboard")


# Custom load shape
class StagesShape(LoadTestShape):
    """Ramp up, sustain, spike, recover"""
    stages = [
        {"duration": 60,  "users": 10,  "spawn_rate": 2},
        {"duration": 300, "users": 50,  "spawn_rate": 5},
        {"duration": 360, "users": 200, "spawn_rate": 50},  # Spike
        {"duration": 420, "users": 50,  "spawn_rate": 10},  # Recover
        {"duration": 480, "users": 0,   "spawn_rate": 10},  # Ramp down
    ]

    def tick(self):
        run_time = self.get_run_time()
        for stage in self.stages:
            if run_time < stage["duration"]:
                return (stage["users"], stage["spawn_rate"])
        return None
```

### Locust CLI

```bash
# Run with web UI (default port 8089)
locust -f locustfile.py --host https://api.example.com

# Headless mode
locust -f locustfile.py --host https://api.example.com \
  --headless -u 100 -r 10 --run-time 5m
# -u: total users, -r: spawn rate per second

# Distributed mode
# Master:
locust -f locustfile.py --master
# Workers (on each worker machine):
locust -f locustfile.py --worker --master-host=MASTER_IP

# CSV output
locust -f locustfile.py --headless -u 50 -r 5 --run-time 5m \
  --csv=results --csv-full-history

# HTML report
locust -f locustfile.py --headless -u 50 -r 5 --run-time 5m \
  --html=report.html
```

## autocannon (Node.js)

### CLI and Programmatic Usage

```bash
# Basic usage
autocannon -c 100 -d 30 http://localhost:3000
# -c: connections, -d: duration in seconds

# With pipelining (multiple requests per connection)
autocannon -c 100 -p 10 -d 30 http://localhost:3000

# POST with body
autocannon -c 50 -d 30 -m POST \
  -H "Content-Type=application/json" \
  -b '{"key":"value"}' \
  http://localhost:3000/api/data

# HAR file input
autocannon -c 100 -d 30 --har requests.har http://localhost:3000
```

```javascript
// Programmatic usage
const autocannon = require('autocannon');

const result = await autocannon({
  url: 'http://localhost:3000',
  connections: 100,
  duration: 30,
  pipelining: 10,
  headers: {
    'Authorization': 'Bearer TOKEN',
  },
  requests: [
    { method: 'GET', path: '/api/items' },
    { method: 'POST', path: '/api/items', body: JSON.stringify({ name: 'test' }) },
  ],
});

console.log('Avg latency:', result.latency.average, 'ms');
console.log('Req/sec:', result.requests.average);
console.log('Throughput:', result.throughput.average, 'bytes/sec');
```

## Load Testing Methodology

### Test Planning

```
Before running load tests:
│
├─ Define objectives
│  ├─ What SLOs must be met? (p95 < 200ms, 99.9% availability)
│  ├─ What is expected peak traffic? (from analytics/projections)
│  └─ What scenarios matter? (browse, search, checkout, API calls)
│
├─ Prepare environment
│  ├─ Use production-like infrastructure (same specs, same config)
│  ├─ Use realistic data volumes (not empty database)
│  ├─ Isolate from production traffic
│  └─ Ensure monitoring is in place (APM, metrics, logs)
│
├─ Create realistic scenarios
│  ├─ Model real user behavior (browse → search → add to cart → checkout)
│  ├─ Include think time between actions
│  ├─ Mix of read and write operations
│  ├─ Vary request payloads
│  └─ Include authentication flows
│
└─ Establish baselines
   ├─ Run smoke test first (verify test works at low load)
   ├─ Record baseline metrics at known-good load
   └─ Compare subsequent tests against baseline
```

### Test Execution Patterns

```
Ramp-Up Test:
Users ▲
  100 │          ┌──────────────────┐
      │        ╱│                  │╲
   50 │      ╱  │     Sustain      │  ╲
      │    ╱    │                  │    ╲
    0 │──╱─────┼──────────────────┼─────╲──
      └────────────────────────────────────→ Time
      0    2m       5m             7m   9m

Spike Test:
Users ▲
  500 │         ╱╲
      │        ╱  ╲
  100 │───────╱    ╲───────────
      │
    0 │─────────────────────────→ Time

Soak Test:
Users ▲
  100 │  ┌──────────────────────────────┐
      │  │          4-12 hours          │
    0 │──┘                              └──
      └────────────────────────────────────→ Time

Breakpoint Test:
Users ▲
  ??? │                              ╱ ← System breaks here
      │                           ╱
      │                        ╱
      │                     ╱
      │                  ╱
    0 │───────────────╱───────────────→ Time
      Continuously increasing until failure
```

### Results Interpretation

```
Key metrics to analyze:
│
├─ Latency
│  ├─ p50 (median): typical user experience
│  ├─ p95: most users' worst experience
│  ├─ p99: tail latency (1 in 100 requests)
│  ├─ p99.9: extreme tail (important at scale)
│  └─ Compare: p99/p50 ratio > 10x suggests systemic issue
│
├─ Throughput
│  ├─ Requests per second (RPS)
│  ├─ Compare achieved vs target rate
│  ├─ If achieved < target: server saturated
│  └─ Watch for throughput plateau (max capacity reached)
│
├─ Error Rate
│  ├─ HTTP 5xx errors: server failures
│  ├─ HTTP 429 errors: rate limiting
│  ├─ Timeouts: resource exhaustion
│  ├─ Connection refused: port/socket exhaustion
│  └─ Target: <0.1% under normal load
│
├─ Resource Utilization
│  ├─ CPU: >80% sustained = at capacity
│  ├─ Memory: growing = leak, high = needs more RAM
│  ├─ Disk I/O: iowait >20% = I/O bottleneck
│  ├─ Network: check bandwidth, connection count
│  └─ Connection pools: active/waiting/idle ratios
│
└─ Saturation Point
   ├─ Where latency starts increasing non-linearly
   ├─ Where error rate begins climbing
   ├─ Where throughput plateaus despite more load
   └─ This is your system's practical capacity
```

### Common Findings and Fixes

| Finding | Symptom | Root Cause | Fix |
|---------|---------|------------|-----|
| Latency spike at load | p99 jumps at N users | Connection pool exhaustion | Increase pool size, optimize queries |
| Throughput plateau | RPS flat despite more VUs | CPU saturation | Optimize hot paths, scale horizontally |
| Error rate climbs gradually | 5xx increases with load | Memory leak under load | Fix leak, increase memory, add limits |
| Timeout cascade | Many timeouts after first | No circuit breaker | Add circuit breaker, retry with backoff |
| Uneven distribution | Some pods idle, some overloaded | Bad load balancing | Fix health checks, use least-connections |
| GC pauses | Periodic latency spikes | Large heap, GC pressure | Reduce allocations, tune GC, smaller heap |
| DNS resolution | Intermittent slow requests | DNS lookup on every request | Connection pooling, DNS caching |
| TLS handshake overhead | High latency on first request | No connection reuse | Keep-alive, connection pooling |

## CI Integration

### Performance Budgets

```yaml
# k6 thresholds as CI gates
export const options = {
  thresholds: {
    http_req_duration: [
      { threshold: 'p(95)<500', abortOnFail: true },
      { threshold: 'p(99)<1500', abortOnFail: true },
    ],
    http_req_failed: [
      { threshold: 'rate<0.01', abortOnFail: true },
    ],
    checks: [
      { threshold: 'rate>0.99', abortOnFail: true },
    ],
  },
};
```

### GitHub Actions Example

```yaml
# .github/workflows/load-test.yml
name: Load Test
on:
  pull_request:
    paths: ['src/**', 'package.json']

jobs:
  load-test:
    runs-on: ubuntu-latest
    services:
      app:
        image: myapp:${{ github.sha }}
        ports:
          - 8080:8080
    steps:
      - uses: actions/checkout@v4

      - name: Install k6
        run: |
          sudo gpg -k
          sudo gpg --no-default-keyring --keyring /usr/share/keyrings/k6-archive-keyring.gpg --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys C5AD17C747E3415A3642D57D77C6C491D6AC1D68
          echo "deb [signed-by=/usr/share/keyrings/k6-archive-keyring.gpg] https://dl.k6.io/deb stable main" | sudo tee /etc/apt/sources.list.d/k6.list
          sudo apt-get update
          sudo apt-get install k6

      - name: Run load test
        run: k6 run --out json=results.json tests/load/api-test.js

      - name: Compare with baseline
        run: |
          # Extract p95 from results
          P95=$(jq -r '.data.metrics.http_req_duration.values["p(95)"]' results.json)
          BASELINE=450  # ms
          if (( $(echo "$P95 > $BASELINE" | bc -l) )); then
            echo "::error::p95 latency regression: ${P95}ms > ${BASELINE}ms baseline"
            exit 1
          fi

      - name: Upload results
        if: always()
        uses: actions/upload-artifact@v4
        with:
          name: load-test-results
          path: results.json
```

### Baseline Comparison Strategy

```
Performance regression detection:
│
├─ Establish baseline
│  ├─ Run load test on main branch after each merge
│  ├─ Store results in a time-series DB or artifact storage
│  └─ Track p50, p95, p99, throughput, error rate
│
├─ PR comparison
│  ├─ Run same test on PR branch
│  ├─ Compare against baseline
│  ├─ Alert if metrics degrade beyond threshold
│  └─ Common thresholds: >10% p95 increase, >5% throughput decrease
│
├─ Statistical significance
│  ├─ Run test multiple times (3-5x) to account for noise
│  ├─ Use statistical tests (t-test) to confirm regression
│  └─ Avoid false positives from system noise
│
└─ Trend tracking
   ├─ Plot metrics over time across releases
   ├─ Catch gradual degradation that per-PR tests miss
   └─ Set alerts for multi-week trends
```

### Test Data Management

```
Realistic test data:
│
├─ Data volume
│  ├─ Match production data volume (or representative subset)
│  ├─ Empty DB gives misleadingly good results
│  └─ Index effectiveness depends on data distribution
│
├─ Data variety
│  ├─ Use parameterized inputs (not same request every time)
│  ├─ Vary payload sizes
│  ├─ Include edge cases (long strings, Unicode, special chars)
│  └─ Distribute IDs to avoid cache hot-spotting
│
├─ Data isolation
│  ├─ Each test run should use clean or isolated data
│  ├─ Tests that modify data should not affect next run
│  ├─ Use database transactions/rollback or test-specific namespaces
│  └─ Avoid accumulating data across test runs
│
└─ Data generation
   ├─ k6: use SharedArray for CSV/JSON data files
   ├─ Artillery: use CSV feeders, custom functions
   ├─ Locust: use Python libraries (Faker) for realistic data
   └─ General: pre-generate data, load before test
```