1 week ago · b71a10ede6
--- a/skills/loop-ops/SKILL.md
+++ b/skills/loop-ops/SKILL.md
@@ -277,6 +277,18 @@ python scripts/check-pricing-sync.py --offline   # exit 0 in sync, 10 drift, 3 a
 
				 8. **Read the reports.** Only after the loop's judgment is proven do you graduate it to
			
 
				    **L2** (worktree + guard + `fleet-ops` landing) and re-audit at the higher tier.
			
 
				 
			
 
				+## Worked example
			
 
				+
			
 
				+A complete, **audit + doctor-clean** L1 loop ships at
			
 
				+[assets/examples/pr-babysitter/](assets/examples/pr-babysitter/): a filled
			
 
				+`loop.config.yaml`, a *populated* `STATE.md`, the `run.md` run prompt, a sample
			
 
				+`run-log.md`, and `github-actions.yml` — the scheduler with the **kill-switch gate and
			
 
				+`dontAsk` + allowlist profile baked in**. Copy the dir, adjust scope/cadence, run
			
 
				+`loop-audit` + `loop-doctor --live`, wire the workflow. The other patterns don't ship as
			
 
				+static dirs that rot — `loop-init --pattern <name>` *generates* the same, seeded and
			
 
				+gate-clean, for any pattern at any tier. CI runs `loop-audit` + `loop-doctor` on this
			
 
				+example every build, so it can't drift out of validity.
			
 
				+
			
 
				 ## Anti-patterns (these are detected and wrong)
			
 
				 
			
 
				 - **Routing around the gate.** Wrapping `claude -p --permission-mode bypassPermissions`
			
--- a/skills/loop-ops/assets/examples/pr-babysitter/STATE.md
+++ b/skills/loop-ops/assets/examples/pr-babysitter/STATE.md
@@ -0,0 +1,21 @@
 
				+# pr-babysitter — STATE
			
 
				+_Updated: 2026-06-22T14:05:00Z · run #142 · readiness 100/100_
			
 
				+
			
 
				+<!-- Read first every run: check the kill switch, then act on the Priority list.
			
 
				+     This is a realistic populated snapshot — yours starts from STATE.template.md. -->
			
 
				+
			
 
				+## Priority   (act on these next)
			
 
				+- [P1] PR #412 failing CI 3h10m — `build` job red, looks like a flaky e2e; owner @dana pinged 14:02
			
 
				+- [P1] PR #408 merge conflict with main (settings.ts) — author notified, awaiting rebase
			
 
				+- [P2] PR #415 awaiting review 5h — past the 4h threshold; nudged the reviewers group
			
 
				+
			
 
				+## Watch      (not yet actionable)
			
 
				+- PR #417 awaiting review 1h12m — under threshold, recheck next run
			
 
				+- PR #410 draft — skip until marked ready
			
 
				+
			
 
				+## Noise      (seen + dismissed this run)
			
 
				+- PR #419 opened 6m ago, checks still running — too early
			
 
				+- PR #402 already merged since last run — drop from tracking
			
 
				+
			
 
				+---
			
 
				+_Source: .github/workflows/pr-babysitter.yml · config: loop.config.yaml_
			
--- a/skills/loop-ops/assets/examples/pr-babysitter/github-actions.yml
+++ b/skills/loop-ops/assets/examples/pr-babysitter/github-actions.yml
@@ -0,0 +1,59 @@
 
				+# EXAMPLE scheduler for the pr-babysitter L1 loop — the "clone-and-run" glue.
			
 
				+# Copy to .github/workflows/pr-babysitter.yml, PIN the action/CLI versions, add the
			
 
				+# ANTHROPIC_API_KEY secret. The SCHEDULER is the authorizer (no auto-mode session in the
			
 
				+# loop), and the child runs gated (--permission-mode dontAsk + a narrow allowlist), never
			
 
				+# bypassPermissions on a shared runner. See references/claude-code-loops.md.
			
 
				+name: pr-babysitter
			
 
				+on:
			
 
				+  schedule:
			
 
				+    - cron: "*/10 * * * *"   # every 10 min (matches loop.config.yaml cadence: 10m)
			
 
				+  workflow_dispatch: {}
			
 
				+
			
 
				+permissions:
			
 
				+  contents: write            # commit STATE.md / run-log.md back
			
 
				+  pull-requests: write       # post the at-most-one summary comment (L1 stays report-only)
			
 
				+
			
 
				+concurrency:
			
 
				+  group: pr-babysitter       # never overlap two ticks
			
 
				+  cancel-in-progress: false
			
 
				+
			
 
				+jobs:
			
 
				+  tick:
			
 
				+    runs-on: ubuntu-latest
			
 
				+    steps:
			
 
				+      - uses: actions/checkout@v4   # <-- pin to a SHA in production
			
 
				+
			
 
				+      # Kill switch: a 'loop-pause' label on the repo, or a committed PAUSED sentinel.
			
 
				+      - name: Honor the kill switch
			
 
				+        id: gate
			
 
				+        env: { GH_TOKEN: "${{ github.token }}" }
			
 
				+        run: |
			
 
				+          if [ -f .loops/pr-babysitter/PAUSED ]; then echo "paused=1" >> "$GITHUB_OUTPUT"; fi
			
 
				+          if gh label list --limit 100 | grep -qi '^loop-pause'; then echo "paused=1" >> "$GITHUB_OUTPUT"; fi
			
 
				+
			
 
				+      - name: Install Claude Code
			
 
				+        if: steps.gate.outputs.paused != '1'
			
 
				+        run: npm i -g @anthropic-ai/claude-code   # <-- pin a version
			
 
				+
			
 
				+      # The run: same prompt every tick (cache-friendly), gated with dontAsk + an
			
 
				+      # allowlist scoped to exactly what an L1 report loop needs (read-only + gh + STATE writes).
			
 
				+      - name: Run one tick
			
 
				+        if: steps.gate.outputs.paused != '1'
			
 
				+        env:
			
 
				+          ANTHROPIC_API_KEY: "${{ secrets.ANTHROPIC_API_KEY }}"
			
 
				+        run: |
			
 
				+          cd .loops/pr-babysitter
			
 
				+          claude -p "$(cat run.md)" \
			
 
				+            --permission-mode dontAsk \
			
 
				+            --append-system-prompt "$(cat STATE.md)" \
			
 
				+            --allowedTools 'Bash(gh pr list:*)' 'Bash(gh pr view:*)' 'Bash(gh pr comment:*)' 'Read' 'Write(STATE.md)' 'Write(run-log.md)' \
			
 
				+            --max-turns 30
			
 
				+
			
 
				+      - name: Persist STATE + run-log
			
 
				+        if: steps.gate.outputs.paused != '1'
			
 
				+        run: |
			
 
				+          git config user.name  "pr-babysitter-loop"
			
 
				+          git config user.email "loop@users.noreply.github.com"
			
 
				+          git add .loops/pr-babysitter/STATE.md .loops/pr-babysitter/run-log.md
			
 
				+          git diff --cached --quiet || git commit -m "chore(loop): pr-babysitter tick $(date -u +%FT%TZ)"
			
 
				+          git push
			
--- a/skills/loop-ops/assets/examples/pr-babysitter/loop.config.yaml
+++ b/skills/loop-ops/assets/examples/pr-babysitter/loop.config.yaml
@@ -0,0 +1,21 @@
 
				+# WORKED EXAMPLE — a complete, audit-clean L1 loop. Copy the dir, adjust scope/cadence,
			
 
				+# run loop-audit + loop-doctor --live, then wire the scheduler (github-actions.yml).
			
 
				+# The other patterns: `loop-init --pattern <name>` generates the same, seeded.
			
 
				+name: pr-babysitter
			
 
				+pattern: pr-babysitter
			
 
				+tier: L1
			
 
				+permission_mode: dontAsk
			
 
				+cadence: 10m
			
 
				+goal: "Watch open PRs; flag stuck (no review > 4h), failing checks, and merge conflicts in STATE.md; post at most one summary comment per PR; never merge."
			
 
				+scope:
			
 
				+  - "src/**"
			
 
				+escalation: "a human reviews and merges; never merge to main; never push; never close a PR"
			
 
				+budget_tokens: 60000
			
 
				+kill_switch: ".loops/pr-babysitter/PAUSED exists, OR the 'loop-pause' label is on the repo"
			
 
				+
			
 
				+# ── graduate to L2 (assisted: open a fix-the-PR-description / rebase branch) ──
			
 
				+# tier: L2
			
 
				+# verify: "npm test"
			
 
				+# guard: "npm run typecheck"
			
 
				+# worktree: true
			
 
				+# land_via: fleet-ops
			
--- a/skills/loop-ops/assets/examples/pr-babysitter/run-log.md
+++ b/skills/loop-ops/assets/examples/pr-babysitter/run-log.md
@@ -0,0 +1,5 @@
 
				+# pr-babysitter — run log (append-only; one line per run)
			
 
				+# format: <ISO-Z>  run#N  action=<reported|none>  pr=<n|->  outcome=<…>  tokens=<N>
			
 
				+2026-06-22T14:05:00Z  run#142  action=reported  pr=412  outcome=commented-flaky-ci   tokens=14820
			
 
				+2026-06-22T13:55:00Z  run#141  action=reported  pr=408  outcome=flagged-conflict      tokens=12110
			
 
				+2026-06-22T13:45:00Z  run#140  action=none      pr=-    outcome=quiet                 tokens=2090
			
--- a/skills/loop-ops/assets/examples/pr-babysitter/run.md
+++ b/skills/loop-ops/assets/examples/pr-babysitter/run.md
@@ -0,0 +1,25 @@
 
				+<!--
			
 
				+run.md — fed to `claude -p` each tick. SAME every run (fresh context; state lives in
			
 
				+STATE.md + git, not the conversation). Keep it BYTE-IDENTICAL so the prompt cache hits.
			
 
				+Wired by github-actions.yml:
			
 
				+  claude -p "$(cat run.md)" --permission-mode dontAsk --append-system-prompt "$(cat STATE.md)"
			
 
				+-->
			
 
				+
			
 
				+# Run: pr-babysitter  (tier L1, report-only)
			
 
				+
			
 
				+You are one tick of a scheduled loop. Goal: **watch open PRs and report; never merge, push, or close.**
			
 
				+
			
 
				+## Do these in order
			
 
				+1. **Kill switch first.** If `.loops/pr-babysitter/PAUSED` exists or the repo has the `loop-pause` label, STOP — do nothing.
			
 
				+2. **Read `STATE.md`** (in your system prompt): the Priority / Watch / Noise lists from last run.
			
 
				+3. **List open PRs:** `gh pr list --state open --json number,title,reviewDecision,statusCheckRollup,mergeStateStatus,updatedAt`.
			
 
				+4. **Classify each** PR: failing checks · merge conflict · awaiting-review past 4h · draft · healthy.
			
 
				+5. **Report only.** You may post **at most one** summary comment per PR that newly needs attention (preview the text in the run log; never spam). You do **not** merge, push, rebase, or close — that escalates to a human.
			
 
				+6. **Respect the budget:** stop if you approach 60000 output tokens.
			
 
				+7. **Rewrite `STATE.md`:** move PRs across Priority / Watch / Noise; bump `_Updated_` + run number + readiness.
			
 
				+8. **Append one line to `run-log.md`:** `<ISO-Z>  run#N  action=reported  pr=<n|->  outcome=<…>  tokens=<N>`.
			
 
				+
			
 
				+## Hard rules
			
 
				+- Never merge/push/close/rebase — those are the escalation cases. A general goal is not authorization for them.
			
 
				+- Stay within scope (`src/**`); never touch another session's `.claude/worktrees/`.
			
 
				+- Leave the repo clean every tick.
			
--- a/skills/loop-ops/tests/run.sh
+++ b/skills/loop-ops/tests/run.sh
@@ -252,6 +252,15 @@ out="$("$PYTHON" "$SYNC" --json 2>/dev/null)"
 
				 expect_has "pricing-sync json schema" "claude-mods.loop-ops.pricing-sync/v1" "$out"
			
 
				 expect_has "pricing-sync json in_sync" '"in_sync": true' "$out"
			
 
				 
			
 
				+# ── worked example: the shipped example stays gate-clean ───────────────────
			
 
				+echo "-- worked example --"
			
 
				+EX="$SKILL/assets/examples/pr-babysitter/loop.config.yaml"
			
 
				+[[ -f "$EX" ]] && ok "worked example present" || no "worked example missing"
			
 
				+bash "$AUDIT" "$EX" >/dev/null 2>&1; expect_exit "shipped example audits clean -> 0" 0 $?
			
 
				+bash "$DOCTOR" --offline "$EX" >/dev/null 2>&1; expect_exit "shipped example doctors clean -> 0" 0 $?
			
 
				+[[ -f "$SKILL/assets/examples/pr-babysitter/github-actions.yml" ]] && ok "example ships a scheduler" || no "example missing scheduler"
			
 
				+[[ -f "$SKILL/assets/examples/pr-babysitter/run.md" ]] && ok "example ships a run prompt" || no "example missing run.md"
			
 
				+
			
 
				 # ── terminal design system ─────────────────────────────────────────────────
			
 
				 echo "-- terminal design system --"
			
 
				 for s in "$INIT" "$AUDIT" "$DOCTOR"; do
			
--- a/tests/check-resources.sh
+++ b/tests/check-resources.sh
@@ -65,6 +65,11 @@ echo "== loop-ops: pricing-sync verifier"
 
				 run "pricing-sync --offline in sync" 0 "$PY" skills/loop-ops/scripts/check-pricing-sync.py --offline
			
 
				 run "pricing-sync --help"            0 "$PY" skills/loop-ops/scripts/check-pricing-sync.py --help
			
 
				 
			
 
				+echo "== loop-ops: worked example is gate-clean (dogfood)"
			
 
				+LOOP_EX="skills/loop-ops/assets/examples/pr-babysitter/loop.config.yaml"
			
 
				+run "example audits clean"          0 bash skills/loop-ops/scripts/loop-audit.sh "$LOOP_EX"
			
 
				+run "example doctors clean (offline)" 0 bash skills/loop-ops/scripts/loop-doctor.sh --offline "$LOOP_EX"
			
 
				+
			
 
				 echo "== protocol: every new verifier is executable + compiles"
			
 
				 for s in skills/claude-api-ops/scripts/check-model-table.py \
			
 
				          skills/claude-code-ops/scripts/validate-hooks-json.py \