loop-estimate.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319
  1. #!/usr/bin/env python3
  2. """Estimate the token/$ cost of an outer loop by pattern × cadence × model.
  3. A loop's cost is runs/day × tokens/run × price, and sub-agents multiply tokens/run.
  4. This computes that - and, crucially, models **prompt caching**: a loop re-sends the
  5. SAME run.md + system prefix every tick (the Ralph property), which is the textbook
  6. caching case. Whether caching helps depends on cadence vs cache TTL, so this picks the
  7. TTL and reports the cached projection alongside the naive one.
  8. Pricing reads from assets/model-pricing.json (date-stamped; skills/claude-api-ops is
  9. the source of truth - run its check-model-table.py if you suspect drift).
  10. Usage: loop-estimate.py --pattern P --cadence C --model M [OPTIONS]
  11. Input: argv flags only (no stdin).
  12. Output: stdout = the cost breakdown (plain rows, or --json envelope). Data only.
  13. Stderr: the assumptions + caching note, errors.
  14. Exit: 0 ok, 2 usage, 3 pricing file missing, 4 bad cadence/model/pattern
  15. Estimates, not guarantees - reconcile against the loop's run-log.md actuals. Levers in
  16. order of impact: cadence (halving frequency halves cost), prompt caching (model below),
  17. model tier.
  18. Examples:
  19. loop-estimate.py --pattern pr-watch --cadence 10m --model claude-haiku-4-5
  20. loop-estimate.py --pattern ci-watch --cadence 15m --model claude-sonnet-4-6 --days 30 --json
  21. loop-estimate.py --pattern daily-scan --cadence 6h --model claude-opus-4-8 # too slow to cache
  22. loop-estimate.py --list-models
  23. """
  24. from __future__ import annotations
  25. import argparse
  26. import json
  27. import os
  28. import re
  29. import sys
  30. from pathlib import Path
  31. EX_OK = 0
  32. EX_USAGE = 2
  33. EX_NOTFOUND = 3
  34. EX_VALIDATION = 4
  35. DEFAULT_PRICING = Path(__file__).resolve().parent.parent / "assets" / "model-pricing.json"
  36. # Prompt-caching multipliers vs base input price (claude-api-ops/references/caching-and-cost.md).
  37. CACHE_WRITE_5M = 1.25 # write a 5-minute-TTL entry
  38. CACHE_WRITE_1H = 2.0 # write a 1-hour-TTL entry
  39. CACHE_READ = 0.1 # read any cached entry
  40. # Minimum cacheable prefix (tokens) - below this the cache_control marker is silently
  41. # ignored (caching-and-cost.md). A loop whose static prefix is smaller can't cache.
  42. MIN_PREFIX = {
  43. "claude-fable-5": 512,
  44. "claude-opus-4-8": 1024,
  45. "claude-sonnet-4-6": 1024,
  46. "claude-haiku-4-5": 4096,
  47. }
  48. DEFAULT_MIN_PREFIX = 1024
  49. class Term:
  50. """Minimal ANSI helper (term.sh is bash-only; per TERMINAL-DESIGN.md §9 the Python
  51. port is inline). Honors FORCE_COLOR / NO_COLOR / TERM_ASCII and the bound stream's
  52. TTY + encoding, so piped data stays plain ASCII."""
  53. _C = {"green": "\033[32m", "cyan": "\033[36m", "dim": "\033[2m", "off": "\033[0m"}
  54. def __init__(self, stream=sys.stderr):
  55. enc = (getattr(stream, "encoding", "") or "").lower()
  56. self.ascii = os.environ.get("TERM_ASCII") == "1" or "utf" not in enc
  57. if os.environ.get("FORCE_COLOR"):
  58. self.color = True
  59. elif (os.environ.get("NO_COLOR") is not None
  60. or os.environ.get("TERM") == "dumb"
  61. or not getattr(stream, "isatty", lambda: False)()):
  62. self.color = False
  63. else:
  64. self.color = True
  65. def c(self, name, text):
  66. return f"{self._C.get(name,'')}{text}{self._C['off']}" if self.color else text
  67. def load_pricing(path: Path) -> dict:
  68. if not path.is_file():
  69. print(f"error: pricing file not found: {path}", file=sys.stderr)
  70. raise SystemExit(EX_NOTFOUND)
  71. try:
  72. return json.loads(path.read_text(encoding="utf-8"))
  73. except (json.JSONDecodeError, OSError) as exc:
  74. print(f"error: could not read pricing file: {exc}", file=sys.stderr)
  75. raise SystemExit(EX_VALIDATION)
  76. def runs_per_day(cadence: str, override: float | None) -> float:
  77. """Translate a cadence into runs/day. Supports Nm/Nh/Nd and the common cron
  78. forms `*/N * * * *` and `N * * * *`. --runs-per-day overrides everything."""
  79. if override is not None:
  80. if override <= 0:
  81. print("error: --runs-per-day must be positive", file=sys.stderr)
  82. raise SystemExit(EX_VALIDATION)
  83. return float(override)
  84. s = cadence.strip()
  85. m = re.fullmatch(r"(\d+)([mhd])", s)
  86. if m:
  87. n = int(m.group(1))
  88. if n <= 0:
  89. print(f"error: cadence value must be positive (got '{cadence}')", file=sys.stderr)
  90. raise SystemExit(EX_VALIDATION)
  91. return {"m": 1440.0, "h": 24.0, "d": 1.0}[m.group(2)] / n
  92. cron_min = re.fullmatch(r"\*/(\d+) \* \* \* \*", s)
  93. if cron_min:
  94. n = int(cron_min.group(1))
  95. return 1440.0 / n if n > 0 else 1440.0
  96. if re.fullmatch(r"\d+ \* \* \* \*", s):
  97. return 24.0
  98. print(
  99. f"error: cannot derive runs/day from cadence '{cadence}' - "
  100. "use Nm/Nh/Nd, `*/N * * * *`, or pass --runs-per-day",
  101. file=sys.stderr,
  102. )
  103. raise SystemExit(EX_VALIDATION)
  104. def caching_projection(in_tok, out_tok, sub, in_price, out_price, rpd, model,
  105. prefix_frac, ttl_choice):
  106. """Model prompt-caching of the static run-prompt prefix across ticks.
  107. Returns a dict: ttl, beneficial, reason, cost_per_run/day, prefix_tokens.
  108. The cache stays warm only when the tick interval is <= the TTL (reads refresh it);
  109. a loop slower than the 1h max TTL writes a cold entry every tick - caching can't help.
  110. """
  111. interval_min = 1440.0 / rpd if rpd > 0 else 1e9
  112. prefix_tokens = int(round(in_tok * prefix_frac))
  113. variable_in = in_tok - prefix_tokens
  114. min_prefix = MIN_PREFIX.get(model, DEFAULT_MIN_PREFIX)
  115. # Pick TTL: smallest that stays warm at this cadence.
  116. if ttl_choice == "5m":
  117. ttl, warm = "5m", interval_min <= 5
  118. elif ttl_choice == "1h":
  119. ttl, warm = "1h", interval_min <= 60
  120. else: # auto
  121. if interval_min <= 5:
  122. ttl, warm = "5m", True
  123. elif interval_min <= 60:
  124. ttl, warm = "1h", True
  125. else:
  126. ttl, warm = None, False
  127. out_cost_day = out_tok / 1e6 * out_price * rpd
  128. if prefix_tokens < min_prefix:
  129. return {"ttl": ttl, "beneficial": False,
  130. "reason": f"static prefix ~{prefix_tokens} tok < {model} minimum {min_prefix} tok "
  131. "- cache marker silently ignored; enlarge the run prompt/system or skip caching",
  132. "prefix_tokens": prefix_tokens, "cost_per_day": None, "cost_per_run": None}
  133. if not warm or ttl is None:
  134. return {"ttl": ttl, "beneficial": False,
  135. "reason": f"tick interval ~{interval_min:.0f} min exceeds the cache TTL "
  136. "- the entry expires between ticks, so every tick is a cold write; caching won't help",
  137. "prefix_tokens": prefix_tokens, "cost_per_day": None, "cost_per_run": None}
  138. write_mult = CACHE_WRITE_5M if ttl == "5m" else CACHE_WRITE_1H
  139. # Per day, warm: ~1 cache write of the prefix + (rpd-1) reads; variable input + output full price.
  140. prefix_day = prefix_tokens / 1e6 * in_price * (write_mult + max(rpd - 1, 0) * CACHE_READ)
  141. variable_day = variable_in / 1e6 * in_price * rpd
  142. cost_day = (prefix_day + variable_day + out_cost_day) * sub
  143. return {"ttl": ttl, "beneficial": True, "reason": "",
  144. "prefix_tokens": prefix_tokens, "write_mult": write_mult,
  145. "cost_per_day": cost_day, "cost_per_run": cost_day / rpd if rpd else cost_day}
  146. def fmt_money(x: float) -> str:
  147. if x < 1:
  148. return f"${x:.4f}"
  149. return f"${x:,.2f}"
  150. def main(argv: list[str]) -> int:
  151. p = argparse.ArgumentParser(
  152. prog="loop-estimate.py",
  153. description="Estimate outer-loop cost by pattern × cadence × model, with prompt caching.",
  154. )
  155. p.add_argument("--pattern", default="custom", help="catalog pattern key (default: custom)")
  156. p.add_argument("--cadence", default="1h", help="10m | 1h | 6h | 1d, or a cron string (default: 1h)")
  157. p.add_argument("--model", default="claude-haiku-4-5", help="model id (default: claude-haiku-4-5)")
  158. p.add_argument("--days", type=int, default=30, help="horizon in days for the total (default: 30)")
  159. p.add_argument("--runs-per-day", type=float, default=None, help="override the cadence-derived runs/day")
  160. p.add_argument("--input-tokens", type=int, default=None, help="override per-run input tokens")
  161. p.add_argument("--output-tokens", type=int, default=None, help="override per-run output tokens")
  162. p.add_argument("--subagents", type=int, default=None, help="override the sub-agent fan-out multiplier")
  163. p.add_argument("--cache-prefix-frac", type=float, default=0.6,
  164. help="fraction of input that is the static, cacheable run-prompt prefix (default: 0.6)")
  165. p.add_argument("--cache-ttl", choices=["auto", "5m", "1h"], default="auto",
  166. help="cache TTL to model (default: auto - pick by cadence)")
  167. p.add_argument("--no-cache", action="store_true", help="report the uncached cost only")
  168. p.add_argument("--pricing", default=str(DEFAULT_PRICING), help="path to model-pricing.json")
  169. p.add_argument("--list-models", action="store_true", help="print the pricing table + as-of date, exit 0")
  170. p.add_argument("--json", action="store_true", help="emit a JSON envelope")
  171. try:
  172. args = p.parse_args(argv)
  173. except SystemExit as exc:
  174. return EX_USAGE if exc.code not in (0, None) else (exc.code or EX_OK)
  175. pricing = load_pricing(Path(args.pricing))
  176. models = pricing.get("models", {})
  177. as_of = pricing.get("_as_of", "unknown")
  178. pattern_defaults = pricing.get("_pattern_defaults", {})
  179. if args.list_models:
  180. if args.json:
  181. print(json.dumps({"data": models, "meta": {"as_of": as_of, "schema": "claude-mods.loop-ops.pricing/v1"}}, indent=2))
  182. else:
  183. print(f"{'model':<22}{'input $/MTok':>14}{'output $/MTok':>16}")
  184. for mid, pr in models.items():
  185. print(f"{mid:<22}{pr.get('input_per_mtok', 0):>14.2f}{pr.get('output_per_mtok', 0):>16.2f}")
  186. print(f"\n(as of {as_of}; source of truth: claude-api-ops)", file=sys.stderr)
  187. return EX_OK
  188. if args.days <= 0:
  189. print("error: --days must be positive", file=sys.stderr)
  190. return EX_VALIDATION
  191. if not (0.0 <= args.cache_prefix_frac <= 1.0):
  192. print("error: --cache-prefix-frac must be between 0 and 1", file=sys.stderr)
  193. return EX_VALIDATION
  194. if args.model not in models:
  195. print(f"error: unknown model '{args.model}' - known: {', '.join(models) or '(none)'}", file=sys.stderr)
  196. return EX_VALIDATION
  197. in_price = float(models[args.model]["input_per_mtok"])
  198. out_price = float(models[args.model]["output_per_mtok"])
  199. if args.input_tokens is not None and args.output_tokens is not None:
  200. in_tok, out_tok = args.input_tokens, args.output_tokens
  201. sub = args.subagents if args.subagents is not None else 1
  202. elif args.pattern in pattern_defaults and not args.pattern.startswith("_"):
  203. d = pattern_defaults[args.pattern]
  204. in_tok = args.input_tokens if args.input_tokens is not None else int(d["input"])
  205. out_tok = args.output_tokens if args.output_tokens is not None else int(d["output"])
  206. sub = args.subagents if args.subagents is not None else int(d.get("subagents", 1))
  207. else:
  208. print(
  209. f"error: unknown pattern '{args.pattern}' - pass --input-tokens and "
  210. f"--output-tokens, or use one of: {', '.join(k for k in pattern_defaults if not k.startswith('_'))}",
  211. file=sys.stderr,
  212. )
  213. return EX_VALIDATION
  214. if min(in_tok, out_tok, sub) < 0:
  215. print("error: token counts and --subagents must be non-negative", file=sys.stderr)
  216. return EX_VALIDATION
  217. rpd = runs_per_day(args.cadence, args.runs_per_day)
  218. # ── uncached (naive) ──
  219. cost_in = in_tok / 1_000_000 * in_price
  220. cost_out = out_tok / 1_000_000 * out_price
  221. cost_run = (cost_in + cost_out) * sub
  222. tokens_run = (in_tok + out_tok) * sub
  223. cost_day = cost_run * rpd
  224. cost_horizon = cost_day * args.days
  225. # ── cached projection ──
  226. cache = None
  227. if not args.no_cache:
  228. cache = caching_projection(in_tok, out_tok, sub, in_price, out_price, rpd,
  229. args.model, args.cache_prefix_frac, args.cache_ttl)
  230. if args.json:
  231. data = {
  232. "pattern": args.pattern, "model": args.model, "cadence": args.cadence,
  233. "runs_per_day": round(rpd, 3), "tokens_per_run": tokens_run,
  234. "input_tokens": in_tok, "output_tokens": out_tok, "subagents": sub,
  235. "cost_per_run": round(cost_run, 6), "cost_per_day": round(cost_day, 4),
  236. "days": args.days, "cost_per_horizon": round(cost_horizon, 2),
  237. }
  238. if cache is not None:
  239. if cache["beneficial"]:
  240. cd = cache["cost_per_day"]
  241. data["caching"] = {
  242. "beneficial": True, "ttl": cache["ttl"], "prefix_tokens": cache["prefix_tokens"],
  243. "cost_per_day": round(cd, 4), "cost_per_horizon": round(cd * args.days, 2),
  244. "savings_pct": round((cost_day - cd) / cost_day * 100, 1) if cost_day else 0.0,
  245. }
  246. else:
  247. data["caching"] = {"beneficial": False, "reason": cache["reason"],
  248. "prefix_tokens": cache["prefix_tokens"]}
  249. print(json.dumps({"data": data, "meta": {"as_of": as_of, "schema": "claude-mods.loop-ops.estimate/v1"}}, indent=2))
  250. return EX_OK
  251. t = Term(sys.stderr)
  252. print(f"{'pattern:':<16}{args.pattern}")
  253. print(f"{'model:':<16}{args.model}")
  254. print(f"{'cadence:':<16}{args.cadence} -> {rpd:g} runs/day")
  255. print(f"{'tokens/run:':<16}{tokens_run:,} ({in_tok:,} in + {out_tok:,} out) x {sub} subagent(s)")
  256. print(f"{'cost/run:':<16}{fmt_money(cost_run)}")
  257. print(f"{'cost/day:':<16}{fmt_money(cost_day)}")
  258. print(f"{'cost/'+str(args.days)+'d:':<16}{fmt_money(cost_horizon)} (uncached)")
  259. if cache is not None:
  260. if cache["beneficial"]:
  261. cd, ch = cache["cost_per_day"], cache["cost_per_day"] * args.days
  262. save = (cost_day - cd) / cost_day * 100 if cost_day else 0.0
  263. print(f"{'cached/'+str(args.days)+'d:':<16}{t.c('cyan', fmt_money(ch))} "
  264. f"({t.c('green', f'-{save:.0f}%')}, TTL {cache['ttl']}, prefix ~{cache['prefix_tokens']:,} tok)")
  265. print(f"recommendation: cache the static run.md+system prefix at TTL {cache['ttl']} "
  266. f"-> ~-{save:.0f}%/mo. Keep run.md BYTE-IDENTICAL every tick or the cache never hits.",
  267. file=sys.stderr)
  268. else:
  269. print(f"caching: not beneficial here", file=sys.stderr)
  270. print(f" why: {cache['reason']}", file=sys.stderr)
  271. print(f"estimate (as of {as_of} pricing) - reconcile against run-log.md actuals; "
  272. "cadence is the biggest lever, then caching, then model tier", file=sys.stderr)
  273. return EX_OK
  274. if __name__ == "__main__":
  275. sys.exit(main(sys.argv[1:]))